First we get the metaviper predictions, LV scores, and Random Forest weights from Synapse
#get immune predictions
dtab<-synapser::synTableQuery(paste('select * from',mp_scores))$asDataFrame()%>%
subset(isCellLine!='TRUE')
##
Downloading [#-------------------]7.43% 2.0MB/26.9MB (1.5MB/s) Job-99866576920768443768110964.csv
Downloading [###-----------------]14.85% 4.0MB/26.9MB (1.7MB/s) Job-99866576920768443768110964.csv
Downloading [####----------------]22.28% 6.0MB/26.9MB (1.8MB/s) Job-99866576920768443768110964.csv
Downloading [######--------------]29.71% 8.0MB/26.9MB (1.8MB/s) Job-99866576920768443768110964.csv
Downloading [#######-------------]37.13% 10.0MB/26.9MB (1.9MB/s) Job-99866576920768443768110964.csv
Downloading [#########-----------]44.56% 12.0MB/26.9MB (1.9MB/s) Job-99866576920768443768110964.csv
Downloading [##########----------]51.99% 14.0MB/26.9MB (1.9MB/s) Job-99866576920768443768110964.csv
Downloading [############--------]59.41% 16.0MB/26.9MB (1.9MB/s) Job-99866576920768443768110964.csv
Downloading [#############-------]66.84% 18.0MB/26.9MB (1.9MB/s) Job-99866576920768443768110964.csv
Downloading [###############-----]74.27% 20.0MB/26.9MB (2.0MB/s) Job-99866576920768443768110964.csv
Downloading [################----]81.69% 22.0MB/26.9MB (2.0MB/s) Job-99866576920768443768110964.csv
Downloading [##################--]89.12% 24.0MB/26.9MB (2.0MB/s) Job-99866576920768443768110964.csv
Downloading [###################-]96.54% 26.0MB/26.9MB (2.0MB/s) Job-99866576920768443768110964.csv
Downloading [####################]100.00% 26.9MB/26.9MB (2.0MB/s) Job-99866576920768443768110964.csv Done...
##get metaviper scores
mtab<-synapser::synTableQuery(paste('select * from',metaviper_scores))$asDataFrame()
##
Building the CSV... [#-------------------]6.97% 102315/1467984
Building the CSV... [#####---------------]22.71% 333429/1467984
Building the CSV... [######--------------]30.27% 444360/1467984
Building the CSV... [#########-----------]45.70% 670803/1467984
Create CSV FileHandle [##########----------]50.14% 736061/1467984
Create CSV FileHandle [####################]100.00% 1467984/1467984 Done...
Downloading [--------------------]2.47% 2.0MB/81.0MB (2.0MB/s) Job-100539297652050599950913762.csv
Downloading [#-------------------]4.94% 4.0MB/81.0MB (2.0MB/s) Job-100539297652050599950913762.csv
Downloading [#-------------------]7.40% 6.0MB/81.0MB (2.0MB/s) Job-100539297652050599950913762.csv
Downloading [##------------------]9.87% 8.0MB/81.0MB (1.9MB/s) Job-100539297652050599950913762.csv
Downloading [##------------------]12.34% 10.0MB/81.0MB (2.0MB/s) Job-100539297652050599950913762.csv
Downloading [###-----------------]14.81% 12.0MB/81.0MB (2.0MB/s) Job-100539297652050599950913762.csv
Downloading [###-----------------]17.27% 14.0MB/81.0MB (2.1MB/s) Job-100539297652050599950913762.csv
Downloading [####----------------]19.74% 16.0MB/81.0MB (2.1MB/s) Job-100539297652050599950913762.csv
Downloading [####----------------]22.21% 18.0MB/81.0MB (2.2MB/s) Job-100539297652050599950913762.csv
Downloading [#####---------------]24.68% 20.0MB/81.0MB (2.3MB/s) Job-100539297652050599950913762.csv
Downloading [#####---------------]27.14% 22.0MB/81.0MB (2.4MB/s) Job-100539297652050599950913762.csv
Downloading [######--------------]29.61% 24.0MB/81.0MB (2.5MB/s) Job-100539297652050599950913762.csv
Downloading [######--------------]32.08% 26.0MB/81.0MB (2.6MB/s) Job-100539297652050599950913762.csv
Downloading [#######-------------]34.55% 28.0MB/81.0MB (2.7MB/s) Job-100539297652050599950913762.csv
Downloading [#######-------------]37.01% 30.0MB/81.0MB (2.6MB/s) Job-100539297652050599950913762.csv
Downloading [########------------]39.48% 32.0MB/81.0MB (2.8MB/s) Job-100539297652050599950913762.csv
Downloading [########------------]41.95% 34.0MB/81.0MB (2.9MB/s) Job-100539297652050599950913762.csv
Downloading [#########-----------]44.42% 36.0MB/81.0MB (3.0MB/s) Job-100539297652050599950913762.csv
Downloading [#########-----------]46.88% 38.0MB/81.0MB (3.1MB/s) Job-100539297652050599950913762.csv
Downloading [##########----------]49.35% 40.0MB/81.0MB (3.1MB/s) Job-100539297652050599950913762.csv
Downloading [##########----------]51.82% 42.0MB/81.0MB (3.2MB/s) Job-100539297652050599950913762.csv
Downloading [###########---------]54.29% 44.0MB/81.0MB (3.3MB/s) Job-100539297652050599950913762.csv
Downloading [###########---------]56.76% 46.0MB/81.0MB (3.4MB/s) Job-100539297652050599950913762.csv
Downloading [############--------]59.22% 48.0MB/81.0MB (3.4MB/s) Job-100539297652050599950913762.csv
Downloading [############--------]61.69% 50.0MB/81.0MB (3.5MB/s) Job-100539297652050599950913762.csv
Downloading [#############-------]64.16% 52.0MB/81.0MB (3.6MB/s) Job-100539297652050599950913762.csv
Downloading [#############-------]66.63% 54.0MB/81.0MB (3.6MB/s) Job-100539297652050599950913762.csv
Downloading [##############------]69.09% 56.0MB/81.0MB (3.6MB/s) Job-100539297652050599950913762.csv
Downloading [##############------]71.56% 58.0MB/81.0MB (3.7MB/s) Job-100539297652050599950913762.csv
Downloading [###############-----]74.03% 60.0MB/81.0MB (3.7MB/s) Job-100539297652050599950913762.csv
Downloading [###############-----]76.50% 62.0MB/81.0MB (3.8MB/s) Job-100539297652050599950913762.csv
Downloading [################----]78.96% 64.0MB/81.0MB (3.8MB/s) Job-100539297652050599950913762.csv
Downloading [################----]81.43% 66.0MB/81.0MB (3.9MB/s) Job-100539297652050599950913762.csv
Downloading [#################---]83.90% 68.0MB/81.0MB (3.9MB/s) Job-100539297652050599950913762.csv
Downloading [#################---]86.37% 70.0MB/81.0MB (4.0MB/s) Job-100539297652050599950913762.csv
Downloading [##################--]88.83% 72.0MB/81.0MB (4.0MB/s) Job-100539297652050599950913762.csv
Downloading [##################--]91.30% 74.0MB/81.0MB (4.1MB/s) Job-100539297652050599950913762.csv
Downloading [###################-]93.77% 76.0MB/81.0MB (4.1MB/s) Job-100539297652050599950913762.csv
Downloading [###################-]96.24% 78.0MB/81.0MB (4.2MB/s) Job-100539297652050599950913762.csv
Downloading [####################]98.70% 80.0MB/81.0MB (4.2MB/s) Job-100539297652050599950913762.csv
Downloading [####################]100.00% 81.0MB/81.0MB (4.2MB/s) Job-100539297652050599950913762.csv Done...
##get rf loadings
rftab<-synapser::synTableQuery(paste('select * from',rf_mp))$asDataFrame()%>%
select(LV_Full,`Cutaneous Neurofibroma`,`Neurofibroma`,`Malignant Peripheral Nerve Sheath Tumor`,`Plexiform Neurofibroma`)%>%
mutate(latent_var=gsub('`','',LV_Full))%>%
select(-LV_Full)
##
[####################]100.00% 1/1 Done...
Downloading [####################]100.00% 79.8kB/79.8kB (496.9kB/s) Job-100539604708205143808526086.csv Done...
samps<-intersect(dtab$specimenID,mtab$specimenID)
mp_res<-dtab%>%
subset(specimenID%in%samps)%>%
group_by(latent_var) %>%
mutate(sd_value = sd(value)) %>%
filter(sd_value > 0.05) %>%
ungroup()%>%
select(latent_var,value,specimenID,sd_value,diagnosis)%>%
left_join(rftab,by='latent_var')
combined<-mp_res%>%ungroup()%>%inner_join(mtab,by='specimenID')
#now compute some basic stats
mp_stats<-mp_res%>%
rowwise()%>%mutate(MaxVal=max(`Cutaneous Neurofibroma`,`Plexiform Neurofibroma`,`Malignant Peripheral Nerve Sheath Tumor`,Neurofibroma))%>%
rowwise()%>%
mutate(MeanVal=mean(c(`Cutaneous Neurofibroma`,`Plexiform Neurofibroma`,`Malignant Peripheral Nerve Sheath Tumor`,Neurofibroma)))
DT::datatable(mp_stats)
We can now see which latent variables seem to be ‘active’ in various random forests. Either uniquely for each tumor type or across the board. Let’s take the top 10 across the various tumor types as well as the highest mean value
cols=c("Cutaneous Neurofibroma", "Neurofibroma", 'Malignant Peripheral Nerve Sheath Tumor',"Plexiform Neurofibroma","MeanVal")
top10<-do.call(cbind,lapply(cols,function(x){
nrf<-rename(mp_stats,dis=x)%>%
select(dis,latent_var)%>%distinct()%>%
arrange(desc(dis))%>%select(latent_var)
nrf[1:10,1]
}))
names(top10)<-cols
DT::datatable(top10)
With the top 10 most impactful LVs for each random forest prediction, we can plot those metaviper proteins that correlate with them.
corVals=combined%>%#subset(latent_var%in%unique(unlist(top10)))%>%
group_by(latent_var,gene)%>%
summarize(corVal=cor(value,metaviperscore,use='pairwise.complete.obs'),numSamps=n_distinct(specimenID))
corVals
## # A tibble: 578,740 x 4
## # Groups: latent_var [95]
## latent_var gene corVal numSamps
## <chr> <chr> <dbl> <int>
## 1 1,REACTOME_MRNA_SPLICING AATF 0.531 81
## 2 1,REACTOME_MRNA_SPLICING ABCA1 -0.549 81
## 3 1,REACTOME_MRNA_SPLICING ABCC8 -0.144 81
## 4 1,REACTOME_MRNA_SPLICING ABCC9 -0.590 81
## 5 1,REACTOME_MRNA_SPLICING ABCG1 -0.647 81
## 6 1,REACTOME_MRNA_SPLICING ABCG4 0.108 81
## 7 1,REACTOME_MRNA_SPLICING ABI1 -0.503 81
## 8 1,REACTOME_MRNA_SPLICING ABL1 -0.0134 81
## 9 1,REACTOME_MRNA_SPLICING ABL2 -0.173 81
## 10 1,REACTOME_MRNA_SPLICING ABLIM3 -0.680 81
## # … with 578,730 more rows
##let's store this in Synapse
tab<-synBuildTable('Metaviper Latent-Variable Correlations',parent='syn21046734',corVals)
synStore(tab)
##
Uploading [--------------------]0.00% 0.0bytes/30.4MB file10a284730857
Uploading [#####---------------]26.28% 8.0MB/30.4MB (750.0kB/s) file10a284730857
Uploading [###########---------]52.55% 16.0MB/30.4MB (714.0kB/s) file10a284730857
Uploading [################----]78.83% 24.0MB/30.4MB (686.9kB/s) file10a284730857
Uploading [####################]100.00% 30.4MB/30.4MB (680.3kB/s) file10a284730857 Done...
[####################]100.00% 1/1 Done...
## <synapseclient.table.CsvFileTable object at 0x1236bd5f8>
corVals<-corVals%>%subset(latent_var%in%unique(unlist(top10)))
##now how do we bracket them?
##plot correlation distributions by cell type and method.
require(ggplot2)
##first re-order variables to plot
top.df<-top10%>%rename(All='MeanVal')%>%gather(key="disease",value="pathway")
p<-corVals%>%
ungroup()%>%
subset(latent_var%in%unique(unlist(top10)))%>%
# mutate(LatentVariable = stringr::str_trim(as.character(latent_var), 20))%>%
ggplot()+geom_boxplot(aes(x=latent_var,y=corVal))+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ggtitle("Correlation of metaviper proteins with lv")
print(p)
There are some proteins that show up as highly correlated. By choosing a threshold, we can evaluate what they are in more detail.
These plots represent the top 10 latent variables for a predictor of each tumor type and the proteins that are correlated with them.
corthresh=0.75
##now filter to the cell types with correlated proteins
cor_cell_types=subset(corVals,corVal>corthresh)%>%
subset(latent_var%in%unique(unlist(top10)))%>%
ungroup()%>%
select(latent_var)%>%
distinct()
print(paste('we found',nrow(cor_cell_types),'lvs with some protein correlation greater than',corthresh))
## [1] "we found 19 lvs with some protein correlation greater than 0.75"
DT::datatable(cor_cell_types)
apply(cor_cell_types,1,function(x){
ct=x[['latent_var']]
# m=x[['method']]
#for each gene and cell type
genes=subset(corVals,latent_var==ct)%>%
subset(corVal>corthresh)%>%
arrange(desc(corVal))%>%
ungroup()
if(nrow(genes)>12){
new.corthresh=format(genes$corVal[12],digits=3)
genes=genes[1:12,]
}else{
new.corthresh=corthresh
}
scores=subset(combined,gene%in%genes$gene)%>%subset(latent_var==ct)
p2<- ggplot(scores)+
geom_point(aes(x=value,y=metaviperscore,
col=gene,shape=tumorType))+
# scale_x_log10()+
ggtitle(paste(ct,'correlation >',new.corthresh,'\n',
subset(top.df,pathway==ct)%>%select(disease)%>%unlist()%>%paste(collapse=',')))
print(p2)
# ggsave(paste0(m,'predictions of',gsub(" ","",gsub("/","",ct)),'cor',new.corthresh,'.pdf'))
})
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
#parentid='syn20710537'
#for(fi in list.files('.')[grep('tions',list.files('.'))])
# synapser::synStore(synapser::File(fi,parentId=parentid,annotations=list(resourceType='analysis',isMultiSpecimen='TRUE',isMultiIndividual='TRUE')),used=c(deconv_scores,metaviper_scores),executed=this.script)